import requests
from bs4 import BeautifulSoup
import re
import urllib
import time
import urllib2
import time
headers = {'user_agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}

def get_author(link):
  try:
   r = requests.get(link,headers,timeout=10)
  except urllib2.HTTPError,e:
   print e.reason
  else:
   html = r.text
  soup = BeautifulSoup(html,'html.parser')
  author_name = soup.find('div',class_='left').text.strip()
  if len(author_name):
   return author_name
  else:
   author_name = 'anonymous'
   return author_name

def get_page(link):
  url = link+"/follower"
  try:
   r = requests.get(url,headers,timeout=10) 
  except urllib2.HTTPError,e:
   print e.reason 
  else:
   html = r.text
  page_num = re.findall(r'<a .*>(.*?)</a><input .*>',html)
  if len(page_num):
   print("page_num",int(page_num[0]))
   return int(page_num[0])
  else: 
   print("page_num",1)
   return 1

def get_href(url,page_num):
 href_fans = set()
 for i in range(0,page_num):
  url_fans = url+"/follower?page="+str(i+1)
  try:
   r = requests.get(url_fans,headers,timeout=10)
  except Exception as e:
   continue
  else:
   print(url_fans+" status",r.status_code)
   html = r.text
  soup = BeautifulSoup(html,'html.parser')
  for link in soup('a',class_='u'):
   href_fans.add(link.get('href'))
 return list(href_fans)

def get_info(hrefs):
 for href in hrefs:
  try:
   r = requests.get(href,headers,timeout=10)
  except Exception as e:
   continue
  else:
   html = r.text
  soup = BeautifulSoup(html,'html.parser')
  links =  soup.find_all('div',class_='personal_right')
  for i in range(0,len(links)):
   name = links[i].find('div',class_='left')
   info = links[i].find('div',class_='personalinfo')
   info_parser = info.text.replace('    ','\n')
   print("********************")
   print(name.text.strip())
   print(info_parser.strip())
   print("********************")

href_authors = set()
for i in range(0,10):
 url = "https://bbs.hupu.com/bxj-"+str(i)
 try:
  r = requests.get(url,headers,timeout=10)
 except urllib.URLError,e:
  print e.reason
 except urllib2.HTTPError,e:
  print e.reason
 else:
  print(str(i+1),"status:",r.status_code)
  html = r.text
 soup = BeautifulSoup(html,'html.parser')
 for link in soup.find_all('a',class_="aulink"):
  href_authors.add(link.get('href'))
for href_author in list(href_authors):
 author = get_author(href_author)
 fans = get_href(href_author,get_page(href_author))
 print("********************")
 print(author+" "+"Fans sum:"+str(len(fans)))
 print("********************")
 get_info(fans)

 
 

